Motivation

IMAGE ALT TEXT HERE

Data wrangling

Loading the gapminder and dplyr packages

Lade die Bibliotheken gapminder und dplyr.

# Load the gapminder package
library(gapminder)

# Load the dplyr package
library(dplyr)

# Look at the gapminder dataset
gapminder

Understanding a data frame

Zähle die Zeilen des Datensatzes.

# How many observations (rows) are in the dataset?
nrow(gapminder)
## [1] 1704

The pipe

IMAGE ALT TEXT HERE

The filter verb

Filtering for one year

Filtere die Daten des Jahres 1957 raus.

# Filter the gapminder dataset for the year 1957
gapminder %>% filter(year==1957)

Filtering for one country and one year

Erstelle zwei Filter für das Jahr 2002 und das Land China.

# Filter for China in 2002
gapminder %>% filter(country=="China",year==2002)

The arrange verb

Arranging observations by life expectancy

Sortiere die Daten nach Lebenserwartung. Wie sehen die Daten aus? Sortiere die Daten nochmals in absteigender Reihenfolge

# Sort in ascending order of lifeExp
gapminder %>% arrange(lifeExp)
# Sort in descending order of lifeExp
gapminder %>% arrange(desc(lifeExp))

Filtering and arranging

Filtere zuerst die Daten aus 1957 raus und dann sortiere absteigend nach Bevölkerungszahl

# Filter for the year 1957, then arrange in descending order of population
gapminder %>% filter(year==1957)%>%arrange(desc(pop))

The mutate verb

Using mutate to change or create a column

Erstelle eine neue Variable die die Lebenserwartung in Monaten zeigt. Nenne die Variable lifeExpMonths

# Use mutate to change lifeExp to be in months
gapminder %>% mutate(lifeExp = 12 ** lifeExp)
# Use mutate to create a new column called lifeExpMonths
gapminder %>% mutate(lifeExpMonths = 12 * lifeExp)

Combining filter, mutate, and arrange

Kombiniere alle drei bisher gelernten Verben. Filtere zuerst das Jahr 2007 raus, leite eine neue Variable auf Monatsbasis aus der Lebenserwartung ab. und dann sortiere absteigend nach der neuen Variable.

# Filter, mutate, and arrange the gapminder dataset
gapminder %>% 
filter(year==2007) %>%
mutate(lifeExpMonths = 12 * lifeExp) %>%
arrange(desc(lifeExpMonths))

Data visualization

IMAGE ALT TEXT HERE

Variable assignment

lade das Paket ggplot2. Filtere die Daten aus dem Jahr 1952 und speichere das in einen neuen Datensatz gapminder_1952

# Load the ggplot2 package as well
library(ggplot2)

# Create gapminder_1952
gapminder_1952 <- gapminder %>% filter(year==1952) 

Comparing population and GDP per capita

Nimm den neuen Datensatz und mache eine Punktgrafik mit gdpPercap auf der x-Achse und lifeExp auf der y-Achse. Tausche auf der x-Achse gdpPercap mit pop und auf der y-Achse lifeExp mit gdpPercap aus.

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Change to put pop on the x-axis and gdpPercap on the y-axis
ggplot(gapminder_1952, aes(x = gdpPercap, y = lifeExp)) +
  geom_point()

ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
  geom_point()

Comparing population and life expectancy

Mach die selbe Grafik mit pop und lifeExp

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Create a scatter plot with pop on the x-axis and lifeExp on the y-axis
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
  geom_point()

Log Scales

Putting the x-axis on a log scale

Logarithmiere die x-Achse. Wird die Grafik dadurch besser?

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Change this plot to put the x-axis on a log scale
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
  geom_point()+scale_x_log10()

Putting the x- and y- axes on a log scale

Logarithmiere beide Achsen.

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Scatter plot comparing pop and gdpPercap, with both axes on a log scale
ggplot(gapminder_1952, aes(x = pop, y = gdpPercap)) +
  geom_point()+scale_x_log10() +
  scale_y_log10()

Adding color to a scatter plot

Füge eine dritte ästhetische Komponente hinzu. Färbe die Grafik nach Kontinenten ein.

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Scatter plot comparing pop and lifeExp, with color representing continent
ggplot(gapminder_1952, aes(x = pop, y = lifeExp,color=continent)) +
  geom_point()+scale_x_log10() +
  scale_y_log10()

Adding size and color to a plot

Füge eine vierte ästhetische Komponente hinzu. Mache die Punktgröße vom gdpPercap abhängig.

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Add the size aesthetic to represent a country's gdpPercap
ggplot(gapminder_1952, aes(x = pop, y = lifeExp, color = continent,size=gdpPercap)) +
  geom_point() +
  scale_x_log10()

Faceting

Creating a subgraph for each continent

Separiere die Grafik in Facetten für jeden Kontinent.

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Scatter plot comparing pop and lifeExp, faceted by continent
ggplot(gapminder_1952, aes(x = pop, y = lifeExp)) +
  geom_point() +
  scale_x_log10()+
  facet_wrap(~ continent)

Faceting by year

Separiere die Grafik in Facetten für jedes Jahr.

# Scatter plot comparing gdpPercap and lifeExp, with color representing continent
# and size representing population, faceted by year
ggplot(gapminder, aes(x = gdpPercap, y = lifeExp,color=continent,size=pop)) +
  geom_point() +
  scale_x_log10()+
  facet_wrap(~ year)


Endgegner

# install.packages('devtools')
#devtools::install_github('thomasp85/gganimate')

library(gapminder)
library(gganimate)

ggplot(gapminder, aes(gdpPercap, lifeExp, size = pop, colour = country)) +
  geom_point(alpha = 0.7, show.legend = FALSE) +
  scale_colour_manual(values = country_colors) +
  scale_size(range = c(2, 12)) +
  scale_x_log10() +
  facet_wrap(~continent) +
  # Here comes the gganimate specific bits
  labs(title = 'Year: {frame_time}', x = 'GDP per capita', y = 'life expectancy') +
  transition_time(year) +
  ease_aes('linear')


Grouping and summarizing

Summarizing the median life expectancy

# Summarize to find the median life expectancy
gapminder %>% summarise(medianLifeExp = median(lifeExp))

Summarizing the median life expectancy in 1957

# Filter for 1957 then summarize the median life expectancy
gapminder %>% 
filter(year==1957) %>%
summarise(medianLifeExp = median(lifeExp))

Summarizing multiple variables in 1957

# Filter for 1957 then summarize the median life expectancy and the maximum GDP per capita
gapminder %>% 
filter(year == 1957) %>%
summarise(medianLifeExp = median(lifeExp),
      maxGdpPercap = max(gdpPercap))

group_by verb

Summarizing by year

# Find median life expectancy and maximum GDP per capita in each year

gapminder %>% 
group_by(year) %>%
summarise(medianLifeExp = median(lifeExp),
      maxGdpPercap = max(gdpPercap))

Summarizing by continent

# Find median life expectancy and maximum GDP per capita in each continent in 1957

gapminder %>% 
filter(year == 1957) %>%
group_by(continent) %>%
summarise(medianLifeExp = median(lifeExp),
      maxGdpPercap = max(gdpPercap))

Summarizing by continent and year

# Find median life expectancy and maximum GDP per capita in each year/continent combination
gapminder %>% 
group_by(continent,year) %>%
summarise(medianLifeExp = median(lifeExp),
      maxGdpPercap = max(gdpPercap))

Visualizing summarized data

Visualizing median life expectancy over time

by_year <- gapminder %>%
  group_by(year) %>%
  summarize(medianLifeExp = median(lifeExp),
            maxGdpPercap = max(gdpPercap))

# Create a scatter plot showing the change in medianLifeExp over time
by_year %>% 
ggplot(aes(x=year,y=medianLifeExp))+
geom_point()+expand_limits(y = 0)

Visualizing median GDP per capita per continent over time

# Summarize medianGdpPercap within each continent within each year: by_year_continent
by_year_continent <- gapminder %>%
  group_by(continent, year) %>%
  summarize(medianGdpPercap = median(gdpPercap))

# Plot the change in medianGdpPercap in each continent over time
by_year_continent %>% 
ggplot(aes(x=year,y=medianGdpPercap,color=continent))+
geom_point()+expand_limits(y = 0)

Comparing median life expectancy and median GDP per continent in 2007

# Summarize the median GDP and median life expectancy per continent in 2007
by_continent_2007 <- gapminder %>%
  filter(year == 2007) %>%
  group_by(continent) %>% 
  summarize(medianLifeExp = median(lifeExp), 
  medianGdpPercap = median(gdpPercap))


# Use a scatter plot to compare the median GDP and median life expectancy
by_continent_2007 %>% 
ggplot(aes(x=medianGdpPercap,y=medianLifeExp,color=continent))+
geom_point()+expand_limits(y = 0)

Types of visualizations

Line plots

Visualizing median GDP per capita over time

# Summarize the median gdpPercap by year, then save it as by_year
by_year <- gapminder %>%
group_by(year)%>%
summarise(medianGdpPercap = median(gdpPercap))

# Create a line plot showing the change in medianGdpPercap over time
by_year %>% 
ggplot(aes(x=year,y=medianGdpPercap))+
geom_line()+expand_limits(y = 0)

Visualizing median GDP per capita by continent over time

# Summarize the median gdpPercap by year & continent, save as by_year_continent
by_year_continent <- gapminder %>%
group_by(continent, year)%>%
summarise(medianGdpPercap = median(gdpPercap))

# Create a line plot showing the change in medianGdpPercap by continent over time
by_year_continent %>% 
ggplot(aes(x=year,y=medianGdpPercap,color=continent))+
geom_line()+expand_limits(y = 0)

Bar Plots

Visualizing median GDP per capita by continent

# Summarize the median gdpPercap by year and continent in 1952
by_continent<- gapminder %>%
filter(year==1952) %>%
group_by(continent) %>%
summarise(medianGdpPercap = median(gdpPercap))


# Create a bar plot showing medianGdp by continent
by_continent %>% 
ggplot(aes(x=continent,y=medianGdpPercap))+
geom_col()

Visualizing GDP per capita by country in Oceania

# Filter for observations in the Oceania continent in 1952
oceania_1952<- gapminder %>%
filter(continent=="Oceania",year==1952)

# Create a bar plot of gdpPercap by country
oceania_1952 %>% 
ggplot(aes(x=country,y=gdpPercap))+
geom_col()

Histograms

Visualizing population

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Create a histogram of population (pop)
gapminder_1952 %>% 
ggplot(aes(x=pop))+
geom_histogram()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Visualizing population with x-axis on a log scale

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Create a histogram of population (pop), with x on a log scale
gapminder_1952 %>% 
ggplot(aes(x=pop))+
geom_histogram()+
scale_x_log10()
## `stat_bin()` using `bins = 30`. Pick better value with `binwidth`.

Box Plots

Comparing GDP per capita across continents

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Create a boxplot comparing gdpPercap among continents
gapminder_1952 %>% 
ggplot(aes(x=continent,y=gdpPercap))+
geom_boxplot()+
scale_y_log10()

Adding a title to your graph

gapminder_1952 <- gapminder %>%
  filter(year == 1952)

# Add a title to this graph: "Comparing GDP per capita across continents"
ggplot(gapminder_1952, aes(x = continent, y = gdpPercap)) +
  geom_boxplot() +
  scale_y_log10()+
  ggtitle("Comparing GDP per capita across continents")